{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "from IPython.core.interactiveshell import InteractiveShell\n",
    "InteractiveShell.ast_node_interactivity = 'all'  # default is ‘last_expr'\n",
    "\n",
    "%load_ext autoreload\n",
    "%autoreload 2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "import sys\n",
    "sys.path.append('/home/mink/notebooks/CameraTraps')  # append this repo to PYTHONPATH\n",
    "sys.path.append('/home/mink/lib/ai4eutils')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "import json\n",
    "import os\n",
    "from collections import Counter, defaultdict\n",
    "from random import sample\n",
    "from shutil import copyfile\n",
    "from multiprocessing.pool import ThreadPool\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "from tqdm import tqdm\n",
    "\n",
    "import path_utils  # ai4eutils\n",
    "\n",
    "from data_management.megadb.schema import sequences_schema_check\n",
    "from data_management.megadb.converters.cct_to_megadb import process_sequences"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# fws_hawaii_kauai_forest_birds_a24s"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "dataset_name = 'fws_hawaii_kauai_forest_birds_a24s'\n",
    "\n",
    "container_root = '/mink_disk_0/camtraps/hawaii-fws-upload/'\n",
    "path_prefix = 'A24s/'\n",
    "\n",
    "path_to_output = f'/home/mink/camtraps/data/megadb_jsons/{dataset_name}.json' \n",
    "path_to_output_temp = f'/home/mink/camtraps/data/megadb_jsons/{dataset_name}_temp.json' "
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 0 - Add an entry to the `datasets` table\n",
    "\n",
    "Done"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 1 - Prepare the `sequence` objects to insert into the database\n",
    "\n",
    "No species or sequence info.\n",
    "\n",
    "Location inferred from folder structure. Different for the two sub-folders."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "315362"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "313769"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "folder = os.path.join(container_root, path_prefix)\n",
    "\n",
    "paths = path_utils.recursive_file_list(folder)\n",
    "len(paths)\n",
    "paths = sorted([p.split(folder)[1] for p in paths if path_utils.is_image_file(p)])\n",
    "len(paths)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'HPK/100RECNX/IMG_0101.JPG'"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "paths[100]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### HPK folder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "hpk_sequences = []\n",
    "hpk_locations = set()\n",
    "\n",
    "for p in paths:\n",
    "    if not p.startswith('HPK'):\n",
    "        continue\n",
    "    \n",
    "    p_parts = p.split('/')\n",
    "    location = p_parts[1]\n",
    "    if location.startswith('10'):\n",
    "        location = 'HPK_root'\n",
    "    hpk_locations.add(location)\n",
    "    \n",
    "    seq_id = 'dummy_' + '_'.join(p_parts[1:]).split('.')[0].replace(' ', '')\n",
    "    \n",
    "    hpk_sequences.append({\n",
    "        'dataset': dataset_name,\n",
    "        'seq_id': seq_id,\n",
    "        'location': location,\n",
    "        'class': ['__label_unavailable'],\n",
    "        'images': [\n",
    "            {\n",
    "                'file': p,\n",
    "                'frame_num': 1\n",
    "            }\n",
    "        ]\n",
    "    })"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "13"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "{'Camera 1',\n",
       " 'Camera 2',\n",
       " 'Camera 4',\n",
       " 'Camera 5',\n",
       " 'Camera 6',\n",
       " 'Camera 7',\n",
       " 'Camera 8',\n",
       " 'Camera 9',\n",
       " 'Fall 2017_feral cat',\n",
       " 'G8 Camera',\n",
       " 'H2 Rat Cam 2017 or 2018',\n",
       " 'HPK_root',\n",
       " 'misc HPK trap monitoring 2017ish'}"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(hpk_locations)\n",
    "hpk_locations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "80584"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "[{'dataset': 'fws_hawaii_kauai_forest_birds_a24s',\n",
       "  'seq_id': 'dummy_G8Camera_DCIM_100EK113_PICT1029',\n",
       "  'location': 'G8 Camera',\n",
       "  'class': ['__label_unavailable'],\n",
       "  'images': [{'file': 'HPK/G8 Camera/DCIM/100EK113/PICT1029.JPG',\n",
       "    'frame_num': 1}]},\n",
       " {'dataset': 'fws_hawaii_kauai_forest_birds_a24s',\n",
       "  'seq_id': 'dummy_101RECNX_IMG_4491',\n",
       "  'location': 'HPK_root',\n",
       "  'class': ['__label_unavailable'],\n",
       "  'images': [{'file': 'HPK/101RECNX/IMG_4491.JPG', 'frame_num': 1}]},\n",
       " {'dataset': 'fws_hawaii_kauai_forest_birds_a24s',\n",
       "  'seq_id': 'dummy_Camera9_100RECNX_IMG_0545',\n",
       "  'location': 'Camera 9',\n",
       "  'class': ['__label_unavailable'],\n",
       "  'images': [{'file': 'HPK/Camera 9/100RECNX/IMG_0545.JPG', 'frame_num': 1}]}]"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(hpk_sequences)\n",
    "sample(hpk_sequences, 3)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### MOH folder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "moh_sequences = []\n",
    "moh_locations = set()\n",
    "\n",
    "for p in paths:\n",
    "    if not p.startswith('MOH'):\n",
    "        continue\n",
    "    \n",
    "    p_parts = p.split('/')\n",
    "    \n",
    "    if p_parts[1] == 'Spring19':\n",
    "        location = p_parts[3].split('(')[0].strip()\n",
    "    else:  # Pilot Study Winter 2018-2019\n",
    "        location = p_parts[2]\n",
    "    if location.endswith(('a', 'b')):\n",
    "        location = location[:-1]\n",
    "    \n",
    "    moh_locations.add(location)\n",
    "    \n",
    "    seq_id = 'dummy_' + '_'.join(p_parts[1:]).split('.')[0].replace(' ', '')\n",
    "    \n",
    "    moh_sequences.append({\n",
    "        'dataset': dataset_name,\n",
    "        'seq_id': seq_id,\n",
    "        'location': location,\n",
    "        'class': ['__label_unavailable'],\n",
    "        'images': [\n",
    "            {\n",
    "                'file': p,\n",
    "                'frame_num': 1\n",
    "            }\n",
    "        ]\n",
    "    })"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "89"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "{'CMA02',\n",
       " 'CMA03',\n",
       " 'CMA04',\n",
       " 'CMA05',\n",
       " 'CMA07',\n",
       " 'CMA09',\n",
       " 'CMA10',\n",
       " 'CMA11',\n",
       " 'CMA13',\n",
       " 'CMB01',\n",
       " 'CMD03',\n",
       " 'CMD04',\n",
       " 'CMD05',\n",
       " 'CMD06',\n",
       " 'CMD07',\n",
       " 'CMD08',\n",
       " 'CMD09',\n",
       " 'CMD10',\n",
       " 'CMD11',\n",
       " 'CMD12',\n",
       " 'CMD13',\n",
       " 'CMD14',\n",
       " 'CMD15',\n",
       " 'CMG16',\n",
       " 'CMG17',\n",
       " 'CMG18',\n",
       " 'CMG19',\n",
       " 'CMG20',\n",
       " 'CMG21',\n",
       " 'CMG22',\n",
       " 'CMG23',\n",
       " 'CMG24',\n",
       " 'CMG25',\n",
       " 'CMG26',\n",
       " 'MD01',\n",
       " 'MD08',\n",
       " 'MD16',\n",
       " 'MD17',\n",
       " 'MD18',\n",
       " 'MD19',\n",
       " 'ME01',\n",
       " 'ME02',\n",
       " 'ME03',\n",
       " 'ME04',\n",
       " 'ME06',\n",
       " 'ME07',\n",
       " 'ME08',\n",
       " 'ME10',\n",
       " 'ME12',\n",
       " 'ME13',\n",
       " 'ME15',\n",
       " 'ME16',\n",
       " 'ME17',\n",
       " 'ME18',\n",
       " 'ME20',\n",
       " 'ME21',\n",
       " 'MF03',\n",
       " 'MF04',\n",
       " 'MF05',\n",
       " 'MG03',\n",
       " 'MG04',\n",
       " 'MG05',\n",
       " 'MG07',\n",
       " 'MG08',\n",
       " 'MG09',\n",
       " 'MG10',\n",
       " 'MG12',\n",
       " 'MG13',\n",
       " 'MG15',\n",
       " 'MH01',\n",
       " 'MH02',\n",
       " 'MH03',\n",
       " 'MH04',\n",
       " 'MH05',\n",
       " 'MH06',\n",
       " 'MH07',\n",
       " 'MH08',\n",
       " 'MH09',\n",
       " 'MH11',\n",
       " 'MH12',\n",
       " 'MI01',\n",
       " 'MI02',\n",
       " 'MI03',\n",
       " 'MJ01',\n",
       " 'MJ02',\n",
       " 'MJ04',\n",
       " 'MJ05',\n",
       " 'MJ07',\n",
       " 'MJ10'}"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(moh_locations)\n",
    "moh_locations"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "233185"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "[{'dataset': 'fws_hawaii_kauai_forest_birds_a24s',\n",
       "  'seq_id': 'dummy_Spring19_April5-10_MH09_R05_IMG_1708',\n",
       "  'location': 'MH09',\n",
       "  'class': ['__label_unavailable'],\n",
       "  'images': [{'file': 'MOH/Spring19/April5-10/MH09/R05/IMG_1708.JPG',\n",
       "    'frame_num': 1}]},\n",
       " {'dataset': 'fws_hawaii_kauai_forest_birds_a24s',\n",
       "  'seq_id': 'dummy_PilotStudyWinter2018-19_MD16_2ndRoundJan_CMD16B_DCIM_101EK113_EK005325',\n",
       "  'location': 'MD16',\n",
       "  'class': ['__label_unavailable'],\n",
       "  'images': [{'file': 'MOH/Pilot Study Winter 2018-19/MD16/2nd Round Jan/CMD16B/DCIM/101EK113/EK005325.JPG',\n",
       "    'frame_num': 1}]}]"
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(moh_sequences)\n",
    "sample(moh_sequences, 2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Combine the two folders"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "sequences = hpk_sequences + moh_sequences"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "313769"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(sequences)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "102"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(hpk_locations) + len(moh_locations)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Step 2 - Pass the schema check\n",
    "\n",
    "Once your metadata are in the MegaDB format for `sequence` items, we check that they conform to the format's schema.\n",
    "\n",
    "If the format conforms, the following messages will be printed:\n",
    "\n",
    "```\n",
    "Verified that the sequence items meet requirements not captured by the schema.\n",
    "Verified that the sequence items conform to the schema.\n",
    "```\n",
    "\n",
    "For large datasets, the second step will take some time (~ a minute). \n",
    "\n",
    "Otherwise there will be an error message describing what's wrong. Please fix the issues until all checks are passed. You might need to write some snippets of code to loop through the `sequence` items to understand which entries have problems."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Verified that the sequence items meet requirements not captured by the schema.\n",
      "Verified that the sequence items conform to the schema.\n",
      "CPU times: user 37.5 s, sys: 1.89 ms, total: 37.5 s\n",
      "Wall time: 37.5 s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "sequences_schema_check.sequences_schema_check(sequences)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "with open(path_to_output_temp, 'w', encoding='utf-8') as f:\n",
    "    json.dump(sequences, f, indent=1, ensure_ascii=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 2b - sample images"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Want to sample 100k from every low-level folder, while keeping images together so annotators can see the sequences if they're naturally sequential"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "folder_to_basenames = defaultdict(list)  # the list of basenames already sorted\n",
    "\n",
    "for seq in sequences:\n",
    "    im = seq['images'][0]\n",
    "    \n",
    "    folder, basename = os.path.split(im['file'])\n",
    "    folder_to_basenames[folder].append(basename)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "311"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(folder_to_basenames)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 311/311 [00:00<00:00, 1839.35it/s]\n"
     ]
    }
   ],
   "source": [
    "images_to_include = []\n",
    "\n",
    "for folder, li in tqdm(folder_to_basenames.items()):\n",
    "    batches_folder = 0\n",
    "    for i in range(0, len(li), 10):  # 10 sequential images\n",
    "        if batches_folder % 3 == 0:  # always take the first 10 in a folder\n",
    "            batch = li[i: i + 10]\n",
    "            files = [os.path.join(folder, i) for i in batch]\n",
    "            images_to_include.extend(files)\n",
    "        batches_folder += 1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "105670"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "105670"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(images_to_include)\n",
    "set_images_to_include = set(images_to_include)\n",
    "len(set_images_to_include)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "['MOH/Spring19/May30-June3/ME21 (ATC 01)/RCNX0301.JPG',\n",
       " 'MOH/Spring19/May30-June3/ME21 (ATC 01)/RCNX0302.JPG',\n",
       " 'MOH/Spring19/May30-June3/ME21 (ATC 01)/RCNX0303.JPG',\n",
       " 'MOH/Spring19/May30-June3/ME21 (ATC 01)/RCNX0304.JPG',\n",
       " 'MOH/Spring19/May30-June3/ME21 (ATC 01)/RCNX0305.JPG',\n",
       " 'MOH/Spring19/May30-June3/ME21 (ATC 01)/RCNX0306.JPG',\n",
       " 'MOH/Spring19/May30-June3/ME21 (ATC 01)/RCNX0307.JPG',\n",
       " 'MOH/Spring19/May30-June3/ME21 (ATC 01)/RCNX0308.JPG',\n",
       " 'MOH/Spring19/May30-June3/ME21 (ATC 01)/RCNX0309.JPG',\n",
       " 'MOH/Spring19/May30-June3/ME21 (ATC 01)/RCNX0310.JPG',\n",
       " 'MOH/Spring19/May30-June3/ME21 (ATC 01)/RCNX0331.JPG',\n",
       " 'MOH/Spring19/May30-June3/ME21 (ATC 01)/RCNX0332.JPG',\n",
       " 'MOH/Spring19/May30-June3/ME21 (ATC 01)/RCNX0333.JPG',\n",
       " 'MOH/Spring19/May30-June3/ME21 (ATC 01)/RCNX0334.JPG',\n",
       " 'MOH/Spring19/May30-June3/ME21 (ATC 01)/RCNX0335.JPG',\n",
       " 'MOH/Spring19/May30-June3/ME21 (ATC 01)/RCNX0336.JPG',\n",
       " 'MOH/Spring19/May30-June3/ME21 (ATC 01)/RCNX0337.JPG',\n",
       " 'MOH/Spring19/May30-June3/ME21 (ATC 01)/RCNX0338.JPG',\n",
       " 'MOH/Spring19/May30-June3/ME21 (ATC 01)/RCNX0339.JPG',\n",
       " 'MOH/Spring19/May30-June3/ME21 (ATC 01)/RCNX0340.JPG']"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "images_to_include[-20:]"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Step 2c - copy images to flat folder"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [],
   "source": [
    "def copy_file(src_path, dst_path):\n",
    "    return copyfile(src_path, dst_path)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "100%|██████████| 313769/313769 [00:24<00:00, 12731.21it/s]"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 1.85 s, sys: 934 ms, total: 2.79 s\n",
      "Wall time: 24.6 s\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "path_pairs = []\n",
    "for seq in tqdm(sequences):    \n",
    "    seq_id = seq['seq_id']\n",
    "    for im in seq['images']:\n",
    "        \n",
    "        if im['file'] not in set_images_to_include:\n",
    "            continue\n",
    "        \n",
    "        src_path = os.path.join(container_root, path_prefix, im['file'])\n",
    "        assert os.path.exists(src_path), src_path\n",
    "        frame = 1\n",
    "        dst_path = os.path.join('/mink_disk_0/camtraps/imerit12c', \n",
    "                                f'{dataset_name}.seq{seq_id}.frame{frame}.jpg')\n",
    "        path_pairs.append((src_path, dst_path))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "105670"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    },
    {
     "data": {
      "text/plain": [
       "('/mink_disk_0/camtraps/hawaii-fws-upload/A24s/MOH/Spring19/May30-June3/ME21 (ATC 01)/RCNX0061.JPG',\n",
       " '/mink_disk_0/camtraps/imerit12c/fws_hawaii_kauai_forest_birds_a24s.seqdummy_Spring19_May30-June3_ME21(ATC01)_RCNX0061.frame1.jpg')"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(path_pairs)\n",
    "path_pairs[-100]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "96"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len('fws_hawaii_kauai_forest_birds_a24s.seqdummy_Spring19_May30-June3_ME21(ATC01)_RCNX0061.frame1.jpg')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPU times: user 51 s, sys: 2min 19s, total: 3min 10s\n",
      "Wall time: 8min 37s\n"
     ]
    }
   ],
   "source": [
    "%%time\n",
    "\n",
    "with ThreadPool(8) as pool:\n",
    "    dst_paths = pool.starmap(copy_file, path_pairs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "105670"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "len(dst_paths)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python [conda env:cameratraps] *",
   "language": "python",
   "name": "conda-env-cameratraps-py"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
